Exam Study Analysis by Three-Level Mixed Effect Model with Two Autoregressive Processes

Author

Tzu-Yao Lin

Published

September 5, 2025

1 Setup

First, we need to load the necessary packages.

library(tidyverse)
theme_set(theme_bw(base_size = 14))
library(lubridate)
library(tsibble)
library(cmdstanr)
register_knitr_engine(override = FALSE)
library(jagsUI)
library(posterior)
library(bayesplot)
color_scheme_set("blue")
# devtools::install_github('crsh/bridgesampling@cmdstanr-crsh', force = T)
library(bridgesampling)

Load the Exam Study data from their original csv file. There is some misalignment of the moment indices due to missing values. I adjusted it by its sampling time point. Luckily, in this experiment, everyone has the same sampling schedule.

rawdata <- read_csv("data/ER exam study - data 28-6-17- 101 PP.csv")

duplicate_point <- which(rawdata$ScheduledTime == lag(rawdata$ScheduledTime))

.data <- rawdata |> 
  slice(-duplicate_point) |> 
  mutate(Participant = cumsum(Participant != lag(Participant, default = 0)),
         Scheduled_time = ScheduledTime, 
         Year = beepyear, 
         Month = beepmonth,
         Date = beepday,
         WDay = wday(Scheduled_time, label = TRUE),
         Time = seconds_to_period(beeptime),
         Beep_index = as.integer(beepnum),
         Get_grade = (exam_beepnum >= 0),
         Missing = !beepdone, 
         Neg_aff = negaff,
         Pos_aff = posaff,
         Neg_aff_comp = negaff_composite, 
         Pos_aff_comp = posaff_composite,
         .keep = "none") |> 
  group_by(Participant) |> 
  mutate(Day = cumsum(Date != lag(Date, default = 0))) |> 
  group_by(Participant, Day) |> 
  mutate(Moment = 1:n(), 
         n_in_day = n(), 
         n_is_10 = n_in_day == 10) |> 
  ungroup()

# Adjest the misalignment of the moment number and deal with the missing data
start_time <- hours(10) |> period_to_seconds()
end_time <- hours(22) |> period_to_seconds()
cut_points <- seq(start_time, end_time, length.out = 11)

data <- .data |> 
  mutate(Moment = pmap_int(list(Moment, n_is_10, Time), 
                              \(Moment, n_is_10, Time){
                                if (n_is_10) {
                                  Moment
                                } else {
                                  as.integer(cut(period_to_seconds(Time), 
                                                 breaks = cut_points, 
                                                 labels = 1:10))
                                }
                              })) |> 
  right_join(expand.grid(Participant = 1:101, 
                         Day = 1:9,
                         Moment = 1:10)) |>
  arrange(Participant, Day, Moment) |> 
  mutate(Missing = is.na(Neg_aff)) |> 
  group_by(Participant) |> 
  fill(Get_grade, .direction = "downup") |> 
  ungroup()

write_rds(data, "data/exam_data_preprocessed.rds")
data <- read_rds("data/exam_data_preprocessed.rds")
rmarkdown::paged_table(data)

There are two types of negative (or positive) affect measurements in the data:

  • Neg_aff: The negative affect scores are reported directly from the participants at each beep
  • Neg_aff_comp: The scores are the average of the 6 negative affect items, including sad, angry, disappointed, ashamed, anxious, and stressed.

Note that, in my previous analysis, I used the Neg_aff_comp as the outcome variable. But here I will use the Neg_aff instead, because it is more concise for our model, which is based on the single measurement rather than the composite score.

The data struture is as follows:

  • Level 3: Participant (N = 101)
  • Level 2: Day (D = 9) per each participant
  • Level 1: Moment (M = 10) per each day and each participant.

Therefore, each participant has 90 measurements in total. However, due to the missing data, the actual number of observations is less than 9090 (= 101 * 9 * 10).

data |> 
  mutate(Date_time = as_datetime(days(Day) + Time)) |> 
  ggplot(aes(x = Date_time, y = Neg_aff)) + 
  geom_line(aes(group = factor(Participant)), color = "grey") + 
  geom_point(color = "grey") +
  geom_smooth() +
  coord_cartesian(ylim = c(0, 100)) +
  scale_x_datetime(breaks = as_datetime(1:9 * 86400),
                  labels = paste("Day", 1:9)) 
Figure 1
library(dtwclust)
library(imputeTS)

ts <- data |> 
  select(Participant, Neg_aff) |> 
  nest(data = Neg_aff) |> 
  mutate(NegAff = map(data, pull, Neg_aff)) |> 
  pull(NegAff)

ts_imputed <- map(ts, na_kalman)

clu <- tsclust(ts_imputed, type = "partitional", k = 9, distance = "dtw_basic", centroid = "dba", seed = 20250829)

data |> 
  add_column(Cluster = factor(rep(clu@cluster, each = 90))) |> 
  mutate(Date_time = as_datetime(days(Day) + Time)) |> 
  ggplot(aes(x = Date_time, y = Neg_aff, color = factor(Participant))) + 
  geom_line() + geom_point() +
  coord_cartesian(ylim = c(0, 100)) +
  scale_x_datetime(breaks = as_datetime(1:9 * 86400),
                  labels = paste("Day", 1:9)) + 
  facet_wrap(~ Cluster, ncol = 3) +
  theme(legend.position = "none")
Figure 2
ave_na_by_day <- data |> 
  group_by(Participant, Day) |> 
  summarise(ave_neg_aff = mean(Neg_aff, na.rm = TRUE)) |> 
  ungroup() |> 
  pivot_wider(names_from = Day, values_from = ave_neg_aff, names_prefix = "Day_") |> 
  select(-Participant) |> 
  as.matrix()

var(ave_na_by_day)
          Day_1    Day_2     Day_3     Day_4     Day_5     Day_6     Day_7
Day_1 129.70497  91.6320  60.27316  83.39461  99.44065  73.82906  87.41512
Day_2  91.63200 207.6443 159.87995 164.20113 118.88478 142.21875 147.58042
Day_3  60.27316 159.8800 533.55532 356.82250 255.39963 218.83776 201.97480
Day_4  83.39461 164.2011 356.82250 446.80186 276.67894 245.13227 229.46225
Day_5  99.44065 118.8848 255.39963 276.67894 329.82294 236.26896 209.30110
Day_6  73.82906 142.2188 218.83776 245.13227 236.26896 284.08531 212.70408
Day_7  87.41512 147.5804 201.97480 229.46225 209.30110 212.70408 286.32530
Day_8  77.04150 141.6025 168.18935 179.52719 186.54610 197.99167 214.41935
Day_9  75.25713 124.3156 121.22220 153.44870 149.49768 185.42691 188.68094
         Day_8     Day_9
Day_1  77.0415  75.25713
Day_2 141.6025 124.31555
Day_3 168.1894 121.22220
Day_4 179.5272 153.44870
Day_5 186.5461 149.49768
Day_6 197.9917 185.42691
Day_7 214.4193 188.68094
Day_8 231.9235 188.12425
Day_9 188.1242 212.79044
cor(ave_na_by_day)
          Day_1     Day_2     Day_3     Day_4     Day_5     Day_6     Day_7
Day_1 1.0000000 0.5583531 0.2291163 0.3464194 0.4807787 0.3846136 0.4536056
Day_2 0.5583531 1.0000000 0.4803351 0.5390868 0.4542824 0.5855615 0.6052557
Day_3 0.2291163 0.4803351 1.0000000 0.7308109 0.6088217 0.5620929 0.5167466
Day_4 0.3464194 0.5390868 0.7308109 1.0000000 0.7207393 0.6880478 0.6415402
Day_5 0.4807787 0.4542824 0.6088217 0.7207393 1.0000000 0.7718659 0.6810849
Day_6 0.3846136 0.5855615 0.5620929 0.6880478 0.7718659 1.0000000 0.7457986
Day_7 0.4536056 0.6052557 0.5167466 0.6415402 0.6810849 0.7457986 1.0000000
Day_8 0.4441954 0.6452656 0.4781194 0.5576996 0.6744866 0.7713476 0.8320735
Day_9 0.4529949 0.5914111 0.3597629 0.4976565 0.5643103 0.7541753 0.7644025
          Day_8     Day_9
Day_1 0.4441954 0.4529949
Day_2 0.6452656 0.5914111
Day_3 0.4781194 0.3597629
Day_4 0.5576996 0.4976565
Day_5 0.6744866 0.5643103
Day_6 0.7713476 0.7541753
Day_7 0.8320735 0.7644025
Day_8 1.0000000 0.8468302
Day_9 0.8468302 1.0000000

2 Model

The general model is: RI_s + RI_d (Hetero) + AR_d + AR_m + Error (Hetero)

The currenet model used here is: RI_s + RI_d (Homo) + AR_d + AR_m + Error (Homo).

3 Reliability

\(R_T\)

\[ \begin{aligned} \R_T &= 1 - \frac{\tr(\symbf{\Sigma}_{R})}{\tr(\symbf{V})} = \frac{\tr(\symbf{Z \Psi Z^\top})}{\tr(\symbf{Z \Psi Z^\top}) + \tr(\symbf{\Sigma}_D) + \tr(\symbf{\Sigma}_M) + \tr(\symbf{\Sigma}_E)} \\ &= \frac{\sigma_s^2 + \bar{\sigma_{d\cdot}^2}}{\sigma_s^2 + \bar{\sigma_{d\cdot}^2} + \tau_d^2 + \tau_m^2 + \bar{\sigma_{\epsilon\cdot}^2}} . \end{aligned} \]

where \(\bar{\sigma_{\epsilon .}^2} = \sum_{j=1}^{D}\frac{\sigma_{d j}^2}{D}\) and \(\bar{\sigma_{\epsilon .}^2} = \sum_{k=1}^{M}\frac{\sigma_{\epsilon k}^2}{M}\).

\(R_\Lambda\)

\[ \R_{\Lambda} = 1 - \frac{|\symbf{\Sigma_{R}}|}{|\symbf{V}|} \]

Reliability Between-person Within-person
Moment-by-moment \(\rho_{\tiny M}^{\tiny B} = \Cor(y_{ijk}, y_{ijk'}) = \frac{\sigma_s^2 + \sigma_{d j}^2 + \tau_d^2 +\tau_m^2\phi_m^{|k-k'|}}{\sqrt{\sigma_s^2 + \sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k}^2}\sqrt{\sigma_s^2 + \sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k'}^2}}\) \(\rho_{\tiny M}^{\tiny W} = \Cor(y_{ijk}, y_{ijk'} \mid i) = \frac{\sigma_{d j}^2 + \tau_d^2 +\tau_m^2\phi_m^{|k-k'|}}{\sqrt{\sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k}^2}\sqrt{\sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k'}^2}}\)
Day-to-Day (singel) \(\rho_{\tiny D}^{\tiny B} = \Cor(y_{ijk}, y_{ij'k'}) = \frac{\sigma_s^2 + \tau_d^2\phi_d^{|j-j'|}}{\sqrt{\sigma_s^2 + \sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k}^2}\sqrt{\sigma_s^2 + \sigma_{d j'}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k'}^2}}\) \(\rho_{\tiny D}^{\tiny W} = \Cor(y_{ijk}, y_{ij'k'} \mid i) = \frac{\tau_d^2\phi_d^{|j-j'|}}{\sqrt{\sigma_{d j}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k}^2}\sqrt{\sigma_{d j'}^2 + \tau_d^2 +\tau_m^2 + \sigma_{\epsilon k'}^2}}\)
Day-to-Day (average) \(\rho_{\tiny D??}^{\tiny B} = \Cor(\bar{y}_{ij\cdot}, \bar{y}_{ij'\cdot}) = \frac{\sigma_s^2 + \tau_d^2\phi_d^{|j-j'|}}{\sqrt{\sigma_s^2 + \sigma_{d j}^2 + \tau_d^2 + \bar{\tau_m^2} + \bar{\sigma_\epsilon^2}}\sqrt{\sigma_s^2 + \sigma_{d j'}^2 + \tau_d^2 + \bar{\tau_m^2} + \bar{\sigma_{\epsilon .}^2}}}\) \(\rho_{\tiny D??}^{\tiny W} = \Cor(\bar{y}_{ij\cdot}, \bar{y}_{ij'\cdot} \mid i) = \frac{\tau_d^2\phi_d^{|j-j'|}}{\sqrt{\sigma_{d j}^2 + \tau_d^2 + \bar{\tau_m^2} + \bar{\sigma_\epsilon^2}}\sqrt{\sigma_{d j'}^2 + \tau_d^2 + \bar{\tau_m^2} + \bar{\sigma_{\epsilon .}^2}}}\)

where \(\bar{\tau_m^2} = \frac{\tau_m^2}{M}\) and \(\bar{\sigma_{\epsilon .}^2} = \sum_{k=1}^{M}\frac{\sigma_{\epsilon k}^2}{M}\).

4 Fitting

N <- 101
D <- 9
M <- 10
model_name <- "exam_3llmm_RIsRIdHOdARdARmERmHOm_nonc_m_long"

Note that, I used the non-centered parameterization for the random effects to improve the sampling efficiency. As for the AR(1) processes, I used the Cholesky decomposition to handle the multivariate normal distribution with the AR(1) covariance structure.

stan/exam_3llmm_RIsRIdHOdARdARmERmHOm_nonc_m.stan
functions {
  // ar(1) correlation matrix generator
  matrix ar1_corr_matrix(int m, real phi) {
    matrix[m, m] h;
    for (i in 1:m)
      for (j in 1:m)
        h[i, j] = phi ^ abs(i - j);
    return h;
  }
}

data {
  int<lower=1> N; // number of subjects
  int<lower=1> D; // number of days
  int<lower=1> M; // number of time points
  int<lower=0, upper=N*D*M> N_obs;
  array[N_obs] int<lower=1, upper=N> ii_obs;
  array[N_obs] int<lower=1, upper=D> jj_obs;
  array[N_obs] int<lower=1, upper=M> kk_obs;
  vector[N_obs] y_obs;
}

parameters {
  real beta; // ground mean (fixed effect)
  vector[N] s_raw;
  real<lower=0> sigma_s; // population sd for the subject effect
  
  array[N] vector[D] d_raw;
  real<lower=0> sigma_d; // population sd for the day effect (heterogenity between days)

  array[N] vector[D] nu_raw;
  real<lower=-1, upper=1> phi_d; // autoregressive parameter between days
  real<lower=0> tau_d;
  
  array[N, D] vector[M] omega_raw;
  real<lower=-1, upper=1> phi_m; // autoregressive parameter between moments
  real<lower=0> tau_m; // sd of the innovation noise for moments
  
  real<lower=0> sigma_epsilon; // population sd for the measurment error (heterogenity between moments)
}

transformed parameters {
  vector[N] s = sigma_s * s_raw; // subject effect (random effect)
  
  array[N] vector[D] d; // day(/subject) effect (random effect)
  for (i in 1:N) {
    d[i] = sigma_d * d_raw[i];
  }
  
  corr_matrix[D] H_d = ar1_corr_matrix(D, phi_d);
  cov_matrix[D] Sigma_d = tau_d^2 * H_d;
  matrix[D, D] L_Sigma_d = cholesky_decompose(Sigma_d);
  array[N] vector[D] nu;
  for (i in 1:N) {
    nu[i] = L_Sigma_d * nu_raw[i];
  }

  corr_matrix[M] H_m = ar1_corr_matrix(M, phi_m);
  cov_matrix[M] Sigma_m = tau_m^2 * H_m;
  matrix[M, M] L_Sigma_m = cholesky_decompose(Sigma_m);
  array[N, D] vector[M] omega;
  for (i in 1:N) {
    for (j in 1:D) {
      omega[i, j] = L_Sigma_m * omega_raw[i, j];
    }
  }
}

model {
  // priors
  target += normal_lpdf(beta | 50, 100);
  target += student_t_lpdf(sigma_s | 3, 0, 2.5) - log(0.5);
  target += student_t_lpdf(sigma_d | 3, 0, 2.5) - log(0.5);
  target += student_t_lpdf(tau_d | 3, 0, 2.5) - log(0.5);
  target += normal_lpdf(phi_d | 0, 0.5) 
        - log_diff_exp(normal_lcdf(1 | 0, 0.5), normal_lcdf(-1  | 0, 0.5));
  target += student_t_lpdf(tau_m | 3, 0, 2.5) - log(0.5);
  target += normal_lpdf(phi_m | 0, 0.5) 
            - log_diff_exp(normal_lcdf(1 | 0, 0.5), normal_lcdf(-1  | 0, 0.5));
  target += student_t_lpdf(sigma_epsilon | 3, 0, 2.5) - log(0.5);
  
  // Level 3:
  target += std_normal_lpdf(s_raw);

  // Level 2:
  for (i in 1:N) {
    target += std_normal_lpdf(d_raw[i]);
    target += std_normal_lpdf(nu_raw[i]);
  }

  // Level 1:
  for (i in 1:N) {
    for (j in 1:D) {
      target += std_normal_lpdf(omega_raw[i, j]);
    }
  }

  // Likelihood
  vector[N_obs] mu_obs;
  for (l in 1:N_obs) {
      mu_obs[l] = beta 
                  + s[ii_obs[l]] 
                  + d[ii_obs[l], jj_obs[l]] 
                  + nu[ii_obs[l], jj_obs[l]] 
                  + omega[ii_obs[l], jj_obs[l], kk_obs[l]];
  }
  target += normal_lpdf(y_obs | mu_obs, sigma_epsilon);
}
mod_fit <- as_cmdstan_fit(list.files(str_glue("stan/draws/{model_name}"), 
                                     pattern = "csv", full.names = TRUE))

5 Results

mod_summary <- read_csv(str_glue("stan/summary/{model_name}_summary.csv"))
mod_summary |> 
  filter(variable %in% c("lp__", "beta") | 
         str_starts(variable, "sigma_") |
         str_starts(variable, "phi_") |
         str_starts(variable, "eta_") |
         str_starts(variable, "tau_")) |> 
print(n = Inf, width = Inf)
# A tibble: 9 × 10
  variable            mean     median       sd      mad         q5        q95
  <chr>              <dbl>      <dbl>    <dbl>    <dbl>      <dbl>      <dbl>
1 lp__          -46968.    -47155     873.     641.     -47945.    -45333.   
2 beta              19.5       19.5     1.26     1.26       17.4       21.5  
3 sigma_s           10.4       10.5     1.60     1.25        7.95      12.6  
4 sigma_d            1.49       1.28    1.08     1.11        0.127      3.55 
5 phi_d              0.573      0.568   0.0761   0.0717      0.457      0.706
6 tau_d             11.2       11.1     0.880    0.767       9.98      12.7  
7 phi_m              0.453      0.455   0.0777   0.0775      0.320      0.578
8 tau_m             11.2       11.1     0.842    0.781       9.96      12.7  
9 sigma_epsilon     10.8       11.0     1.04     0.852       8.82      12.1  
   rhat ess_bulk ess_tail
  <dbl>    <dbl>    <dbl>
1  1.01     271.     194.
2  1.00   14677.   15260.
3  1.00    6030.    3705.
4  1.00    3105.    3101.
5  1.00    3771.    2952.
6  1.00    6314.    4052.
7  1.01     298.     251.
8  1.01     284.     207.
9  1.01     269.     190.

The convergence diagostics look good. See https://xup6y3ul6.github.io/exam_study_analysis/results/exam_3llmm_RIsRIdHOdARdARmERmHOm_nonc_m_long_result.html for details.

5.1 Posterior distributions of variacnce and autoregressive parameters

mod_draws <- mod_fit$draws(format = "matrix")
mcmc_intervals(mod_draws, pars = vars(starts_with("sigma_", ignore.case = FALSE),
                                      starts_with("tau_")))
Figure 3
mcmc_intervals(mod_draws, pars = vars(starts_with("phi_")))
Figure 4

5.2 Overall fitted trend

get_y_hat_draws <- function(draws, include_AR = FALSE) {
  y_hat_draws <- matrix(NA, nrow = nrow(draws), ncol = N*D*M)
  var_names <- vector("character", length = N*D*M)

  for (i in 1:N) {
    for (j in 1:D) {
      for (k in 1:M) {
        index_ijk <- (i - 1) * D * M + (j - 1) * M + k
        var_names[index_ijk] <- str_glue("y_hat[{i},{j},{k}]")

        y_hat <- draws[, "beta"] +
          draws[, str_glue("s[{i}]")] + 
          draws[, str_glue("d[{i},{j}]")]
        
        if (include_AR) {
          y_hat <- y_hat +
            draws[, str_glue("nu[{i},{j}]")] +
            draws[, str_glue("omega[{i},{j},{k}]")] 
        }

        y_hat_draws[, index_ijk] <- y_hat
      }
    }
  }
  colnames(y_hat_draws) <- var_names
  return(y_hat_draws)
}

The blue line is: \(\hat{y}_{ijk} = \hat{\beta} + \hat{s}_i + \hat{d}_{ij} + \hat{\nu}_{ij} + \hat{\omega}_{ijk}\).

The red line is: \(\hat{y}_{ijk} = \hat{\beta} + \hat{s}_i + \hat{d}_{ij} + \hat{\nu}_{ij} + \hat{\omega}_{ijk}\).

y_hat_draws <- get_y_hat_draws(mod_draws, include_AR = TRUE)
y_hat_summary <- summarise_draws(y_hat_draws, mean, median, quantile2)

data_predict <- y_hat_summary |> 
  mutate(Indices = str_extract_all(variable, "\\d+"), 
         Participant = map_dbl(Indices, \(x) as.integer(x[1])),
         Day = map_dbl(Indices, \(x) as.integer(x[2])),
         Moment = map_dbl(Indices, \(x) as.integer(x[3]))) |> 
  right_join(data)

selected_subj <- c(5, 32, 38, 47, 58, 66, 71, 99, 101)

rect_data <- data.frame(
  Day = 1:9,
  xmin = as_datetime(1:9 * 86400),
  xmax = as_datetime((1:9 + 1) * 86400),
  ymin = -Inf,
  ymax = Inf,
  color = ifelse((1:9 + 1) %% 2 == 0, "grey85", NA) 
)

g_yhat <- data_predict |> 
  filter(Participant %in% selected_subj) |> 
  mutate(Date_time = as_datetime(days(Day) + Time)) |> 
  ggplot(aes(x = Date_time, y = Neg_aff)) + 
  geom_rect(data = rect_data, 
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = color), 
            inherit.aes = FALSE, 
            alpha = 0.5) +
  geom_line() + geom_point() +
  geom_line(aes(y = mean), linetype = "dashed", color = "cornflowerblue") +
  geom_ribbon(aes(ymin = q5, ymax = q95), alpha = 0.25, fill = "cornflowerblue") +
  coord_cartesian(ylim = c(-20, 100)) +
  scale_x_datetime(breaks = as_datetime(1:9 * 86400),
                   labels = paste("Day", 1:9)) +
  scale_y_continuous(breaks = seq(0, 100, by = 25)) +
  scale_fill_identity() +
  facet_grid(Participant ~ ., space = "free_y") 
g_yhat
Figure 5: The fitted results

However, if I remove out the AR components, the fitted results look much worse.

y_hat_woAR_draws <- get_y_hat_draws(mod_draws, include_AR = FALSE)
y_hat_woAR_summary <- summarise_draws(y_hat_woAR_draws, mean, median, quantile2)

data_predict2 <- y_hat_woAR_summary |> 
  mutate(Indices = str_extract_all(variable, "\\d+"), 
         Participant = map_dbl(Indices, \(x) as.integer(x[1])),
         Day = map_dbl(Indices, \(x) as.integer(x[2])),
         Moment = map_dbl(Indices, \(x) as.integer(x[3]))) |> 
  right_join(data) |> 
  filter(Participant %in% selected_subj) |> 
  mutate(Date_time = as_datetime(days(Day) + Time))

g_yhat2 <- g_yhat +
  geom_line(data = data_predict2, aes(x = Date_time, y = mean), 
            linetype = "dashed", color = "lightcoral") +
  geom_ribbon(data = data_predict2, aes(x = Date_time, ymin = q5, ymax = q95), 
              alpha = 0.25, fill = "lightcoral") 
g_yhat2

The fitted results with/without AR components{#fig-lmm-fit-w/wo-AR width=672}

5.3 Reliability

get_R_T_draws <- function(draws) {
  sigma_s <- draws[, "sigma_s"]
  sigma_d <- draws[, "sigma_d"]
  tau_d <- draws[, "tau_d"]
  tau_m <- draws[, "tau_m"]
  sigma_epsilon <- draws[, "sigma_epsilon"]
  
  R_T <- (sigma_s^2 + sigma_d^2) / (sigma_s^2 + sigma_d^2 + tau_d^2 + tau_m^2 + sigma_epsilon^2)
  colnames(R_T) <- "R_T"
  return(R_T)
}

get_R_Lambda_draws <- function(draws) {
  draws_list <- list(
    sigma_s = draws[, "sigma_s"],
    sigma_d = draws[, "sigma_d"],
    Sigma_d = draws[, str_glue("Sigma_d[{rep(1:D, each = D)},{rep(1:D, times = D)}]")] |>
      asplit(1) |>
      map(~ matrix(.x, nrow = D, ncol = D)), 
    Sigma_m = draws[, str_glue("Sigma_m[{rep(1:M, each = M)},{rep(1:M, times = M)}]")] |> 
      asplit(1) |> 
      map(~ matrix(.x, nrow = M, ncol = M)),
    sigma_epsilon = draws[, "sigma_epsilon"])

  calc_R_Lambda <- function(sigma_s, sigma_d, Sigma_d, Sigma_m, sigma_epsilon){
    Z <- cbind(1, diag(rep(1, D))) %x% rep(1, M)
    Psi <- diag(c(sigma_s, rep(sigma_d, D)))
    Sigma_D <- Sigma_d %x% matrix(1, nrow = M, ncol = M)
    Sigma_M <- diag(rep(1, D)) %x% Sigma_m
    Sigma_E <- diag(rep(sigma_epsilon, D * M))

    Sigma_R <- Sigma_D + Sigma_M + Sigma_E
    V <- Z %*% Psi %*% t(Z) + Sigma_R

    # Return the calculated value
    1 - det(Sigma_R) / det(V)
  }

  R_Lambda <- furrr::future_pmap_dbl(draws_list, calc_R_Lambda, .progress = TRUE)
  R_Lambda <- matrix(R_Lambda, ncol = 1)
  colnames(R_Lambda) <- "R_Lambda"
  return(R_Lambda)
}

get_rho_draws <- function(draws, days, moments, level = c("B", "W"), ave_by_day = FALSE) {
  day_gap <- abs(diff(days))
  moment_gap <- abs(diff(moments))
  if (ave_by_day == TRUE) moments <- c(".", ".")

  sigma_s <- draws[, "sigma_s"]
  sigma_d <- draws[, "sigma_d"]
  tau_d <- draws[, "tau_d"]
  tau_m <- ifelse(ave_by_day, draws[, "tau_m"]/sqrt(M), draws[, "tau_m"])
  phi_d <- draws[, "phi_d"]
  phi_m <- draws[, "phi_m"]
  sigma_epsilon <- ifelse(ave_by_day, draws[, "sigma_epsilon"]/sqrt(M), draws[, "sigma_epsilon"])

  if (level == "B") {
    rho <- (sigma_s^2 + (day_gap==0)*(sigma_d^2) + tau_d^2*phi_d^day_gap + (day_gap==0)*(tau_m^2 * phi_m^moment_gap)) /
      (sigma_s^2 + sigma_d^2 + tau_d^2 + tau_m^2 + sigma_epsilon^2)
  } else if (level == "W") {
    rho <-((day_gap==0)*(sigma_d^2) + tau_d^2*phi_d^day_gap + (day_gap==0)*(tau_m^2 * phi_m^moment_gap)) /
      (sigma_d^2 + tau_d^2 + tau_m^2 + sigma_epsilon^2)
  } else {
    stop("level must be either 'B' or 'W'")
  }
  colnames(rho) <- str_glue("rho({days[1]}{moments[1]},{days[2]}{moments[2]})_{level}")
  return(rho)
}
R_T_draws <- get_R_T_draws(mod_draws)
R_Lambda_draws <- get_R_Lambda_draws(mod_draws)
R_T_Lambda <- cbind(R_T_draws, R_Lambda_draws)
second_moments <- 2:10
second_days <- 2:9

rho_m2m_B <- map(second_moments, \(m2) get_rho_draws(mod_draws, days = c(1, 1), moments = c(1, m2), level = "B")) |> 
  reduce(cbind) 
rho_d2d_B <- map(second_days, \(d2) get_rho_draws(mod_draws, days = c(1, d2), moments = c(1, 1), level = "B")) |> 
  reduce(cbind)
rho_d2d_ave_B <- map(second_days, \(d2) get_rho_draws(mod_draws, days = c(1, d2), moments = c(1, 1), level = "B", ave_by_day = TRUE)) |> 
  reduce(cbind)
rho_m2m_W <- map(second_moments, \(m2) get_rho_draws(mod_draws, days = c(1, 1), moments = c(1, m2), level = "W")) |> 
  reduce(cbind) 
rho_d2d_W <- map(second_days, \(d2) get_rho_draws(mod_draws, days = c(1, d2), moments = c(1, 1), level = "W")) |> 
  reduce(cbind)
rho_d2d_ave_W <- map(second_days, \(d2) get_rho_draws(mod_draws, days = c(1, d2), moments = c(1, 1), level = "W", ave_by_day = TRUE)) |> 
  reduce(cbind)
rel <- cbind(R_T_Lambda, 
             rho_m2m_B, rho_d2d_B, rho_d2d_ave_B,
             rho_m2m_W, rho_d2d_W, rho_d2d_ave_W)

g_rel_B <- mcmc_intervals(rel, pars = vars(starts_with("R", ignore.case = FALSE) | ends_with("_B"))) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) 
g_rel_B
Figure 6
color_scheme_set("red")
g_rel_W <- mcmc_intervals(rel, pars = vars(ends_with("_W"))) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) 

g_rel_W
Figure 7

6 Model comparison?

mod_bs <- bridge_sampler(mod_fit_stan)
my_bridge_sampler <- function(fit, ...) {
  
  usamples <- fit$unconstrain_draws(format = "matrix")
  log_posterior_stan <- function(pars, ...) {
    fit$log_prob(unconstrained_variables = pars)
  }
  param_names <- colnames(usamples)
  ub <- setNames(rep(Inf, length(param_names)), param_names)
  lb <- setNames(rep(-Inf, length(param_names)), param_names)
  # sigma_cols <- param_names[startsWith(param_names, "sigma")]
  # lb[sigma_cols] <- 0

  bs <- bridge_sampler(
    samples = usamples,
    log_posterior = log_posterior_stan,
    lb = lb,
    ub = ub,
    silent = TRUE
  )
  return(bs)
}

mod_bs <- my_bridge_sampler(mod_fit_stan)

mod_bs

7 Discussion?